import aFunctions
from importlib import reload
from statsmodels.formula.api import ols, logit
from statsmodels.api import add_constant
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import r2_score
from aFunctions import *
init_notebook_mode(connected=True)
tickers = np.array(['SPY', 'TLT',
'XLY', 'XLP', 'XLE', 'XLF', 'XLV', 'XLI', 'XLB', 'XLK', 'XLU'])
np.log([1.02,.98,.88,1.08,1.24])
reload(aFunctions)
from aFunctions import *
start_date = datetime(2004,1,1)
end_date = datetime.today()
df = GetMkt(tickers, start_date, end_date, freq = 'daily', bmark_tick = 'SPY')
sides = ForwardTarget(df, tickers[tickers != 'SPY'], 'relRtn', f=63)
sides = df.reset_index(drop=True).merge(sides.reset_index(drop=True),
on=['date', 'ticker'], how='left')
list(df.columns)
for column in df:
if df[column].isnull().any():
print('{0} has {1} null values'.format(column, df[column].isnull().sum()))
(df.pivot(columns='ticker')['relRtnH252']*252*100).describe().round(2)
(df.pivot(columns='ticker')['clRtnH252']*252*100).describe().round(2)
(df.pivot(columns='ticker')['relHv126']*100).describe().round(2)
(df.pivot(columns='ticker')['clHv126']*100).describe().round(2)
sns.set_style("darkgrid")
sns.set(rc={'figure.figsize':(10,6)})
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
snslp = sns.lineplot(x='date', y='relRtn',
hue="ticker",
data=df)
plt.title("Daily Relative Returns", weight='bold', size = 'large')
snslp = sns.lineplot(x='date', y='adjClose',
hue="ticker",
data=df)
plt.title("Close Adjusted Prices", weight='bold', size = 'large')
snslp = sns.lineplot(x='date', y='Pxln',
hue="ticker",
data=df)
plt.title("Absolute Returns", weight='bold', size = 'large')
snslp = sns.lineplot(x='date', y='relPxln',
hue="ticker",
data=df)
plt.title("Rel to SPY Returns", weight='bold', size = 'large')
adf = pd.DataFrame([], index = ['ADF', 'pval', 'ulag', 'n', 'cval'])
#t[3].resols.summary()
for i in tickers[:]:
c = df[df['ticker'] == i]['Pxln']
t = list(adfuller(c, maxlag=1, regression='c', autolag = None, regresults=False))
adf[i+' - Price'] = t[:5]
c = df[df['ticker'] == i]['clRtn']
t = list(adfuller(c, maxlag=1, regression='c', autolag = None, regresults=False))
adf[i+' - Absolute'] = t[:5]
if i =='SPY':
continue
c = df[df['ticker'] == i]['relRtn']
t = list(adfuller(c, maxlag=1, regression='c', autolag = None, regresults=False))
adf[i+' - Relative'] = t[:5]
c = df[df['ticker'] == i]['relPxln']
t = list(adfuller(c, maxlag=1, regression='c', autolag = None, regresults=False))
adf[i+' - CIX'] = t[:5]
adf = adf.T
adf['1%'] = np.round(adf['cval'][0]['1%'],3)
adf['5%'] = np.round(adf['cval'][1]['5%'],3)
adf['10%'] = np.round(adf['cval'][2]['10%'],3)
print('ADF Test Results on Weekly Returns:')
adf.drop('cval', axis=1)
FFD_cutoff = pd.DataFrame([])
tickers = np.array(['XLY', 'XLP', 'XLE', 'XLF', 'XLV', 'XLI', 'XLB', 'XLK', 'XLU'])
for i in tickers:
o = plotMinFFD(df[df['ticker'] == i]['rreturns'])
o['ticker'] = i
FFD_cutoff = FFD_cutoff.append(o, ignore_index=True)
FFD_cutoff.head()
FFD_cutoff.to_csv('data/FWW-FD-relReturns-cutoffs-d.csv')
FFD_cutoff[np.round(FFD_cutoff['Diff'],2) == .4]
c = 0
t = 1e-2
FFD_df = pd.DataFrame([])
print('Fractional feature, ADF test results post transformation (All Data, 2010+, 2015+)')
for i in tickers[tickers != 'SPY']:
ff = fracDiff_FFD(df[df['ticker'] == i]['relPxln'],
.4,
thres=1e-2).to_frame()
ff['ticker'] = i
FFD_df = FFD_df.append(ff)
print(i, ff.shape)
print('ADF Test Stat',
np.round(list(adfuller(ff['relPxln'], maxlag=1,
regression='c', autolag = None))[0],2),
np.round(list(adfuller(ff.loc[ff.index > datetime(2010, 1, 1)]['relPxln'],
maxlag=1, regression='c', autolag = None))[0],2),
np.round(list(adfuller(ff.loc[ff.index > datetime(2015, 1, 1)]['relPxln'],
maxlag=1, regression='c', autolag = None))[0],2))
c+= 1
FFD_df = FFD_df.rename(columns={"Pxln": "MeanRev-FF"})
FFD_df['date'] = FFD_df.index
def createFeatures(df, tickers, cRetFld, dRetFld, dFrac, ids, drops, relFlds = False):
if type(dFrac) not in [list, float]:
raise Exception('wrong fractional degree of differentation input')
if type(dFrac) == list:
if len(tickers) != len(dFrac):
raise Exception('dFrac shape != Tickers')
mdata = pd.DataFrame([])
c = 0
# Fixed Income Applied to all
ficols = ['clRtnH252', 'clRtnH126', 'clRtnH021', 'clRtnH010',
'clHv010', 'clHv126','Pxln']
fidata = df[df['ticker'] == 'TLT'][ficols]
fidata['FIst6mRealized'] = fidata['clHv010'] - fidata['clHv126']
fidata['FIRet12m1m'] = fidata['clRtnH252'] - fidata['clRtnH021']
fidata['FIRet3m2w'] = fidata['clRtnH126'] - fidata['clRtnH010']
FI_FFD_df = fracDiff_FFD(fidata['Pxln'], .5,
thres=1e-2).to_frame().rename(columns={'Pxln': "FIFF"})
fidata = fidata.merge(FI_FFD_df, how='left', left_index=True, right_index=True)
fidata['FIRSI14d'], fidata['FIRSIs14d'] = RSI(fidata['Pxln'], window_length = 14, MA = 'S')
fidata['FIRSI63d'], fidata['FIRSIs63d'] = RSI(fidata['Pxln'], window_length = 63, MA = 'S')
fidata = fidata[['FIst6mRealized', 'FIRet12m1m', 'FIRet3m2w', 'FIFF', 'FIRSI14d', 'FIRSIs14d',
'FIRSI63d', 'FIRSIs63d']]
tmp = [x for x in list(df.columns) if x not in ids]
tmp = [x for x in tmp if x not in drops]
for t in tickers:
tdata = df[df['ticker'] == t][tmp]
tdf = df[df['ticker'] == t][cRetFld]
ddf = df[df['ticker'] == t][dRetFld]
dOHLC = df[df['ticker'] == t][['close', 'high', 'low', 'open']]
FFD_df = fracDiff_FFD(df[df['ticker'] == t][cRetFld],
dFrac[c] if (type(dFrac)==list) else dFrac,
thres=1e-2).to_frame().rename(columns={cRetFld: "MeanRevFF"})
# Create Signals
tdata['TrendMASlope10'] = tdf.ewm(span=10).mean().diff().rolling(window=5, min_periods=5).mean()
tdata['TrendMASlope50'] = tdf.ewm(span=50).mean().diff().rolling(window=5, min_periods=5).mean()
tdata['TrendMASlope100'] = tdf.ewm(span=100).mean().diff().rolling(window=5, min_periods=5).mean()
tdata['TrendMASlope200'] = tdf.ewm(span=200).mean().diff().rolling(window=5, min_periods=5).mean()
tdata['MeanRevRSI14d'], tdata['TrendRSIs14d'] = RSI(tdf, window_length = 14, MA = 'S')
tdata['MeanRevRSI63d'], tdata['TrendRSIs63d'] = RSI(tdf, window_length = 63, MA = 'S')
tdata['TrendhBma'] = bollinger_hband_indicator(ddf, n = 20).ewm(span=50).mean()
tdata['TrendlBma'] = (bollinger_lband_indicator(ddf, n = 20)*-1).ewm(span=50).mean()
tdata['TrendabsBma'] = (tdata['TrendhBma'] - tdata['TrendlBma']).abs()
tdata['aroon_up'] = aroon_up(tdf, n=50)
tdata['aroon_down'] = aroon_down(tdf, n=50)
tdata['aroon_diff'] = tdata['aroon_up'] - tdata['aroon_down']
tdata['MACD_12_26'] = macd(tdf, n_fast=12, n_slow=26)
tdata['MACD_diff'] = macd_diff(tdf, n_fast=12, n_slow=26, n_sign=9)
tdata['MACD_sign'] = macd_signal(tdf, n_fast=12, n_slow=26, n_sign=9)
tdata['MACD_36_78'] = macd(tdf, n_fast=36, n_slow=78)
tdata['MACD_sign3x'] = macd_signal(tdf, n_fast=36, n_slow=78, n_sign=27)
tdata = tdata.merge(FFD_df, how='left', left_index=True, right_index=True)
tdata['MeanRevFFEWMA1'] = tdata['MeanRevFF'].ewm(span=20).mean()
tdata['MeanRevFFEWMA2'] = tdata['MeanRevFF'].ewm(span=200).mean()
tdata['MeanRevFFDiff'] = tdata['MeanRevFF'] - tdata['MeanRevFFEWMA1']
# Vol Features
tdata['st6mRealized'] = tdata['clHv010'] - tdata['clHv126']
# Return Momentum Features
tdata['Ret12m1m'] = tdata['clRtnH252'] - tdata['clRtnH021']
tdata['Ret12m6m'] = tdata['clRtnH252'] - tdata['clRtnH126']
tdata['Ret6m1m'] = tdata['clRtnH126'] - tdata['clRtnH021']
tdata['Ret6m2w'] = tdata['clRtnH126'] - tdata['clRtnH010']
tdata['Ret3m2w'] = tdata['clRtnH126'] - tdata['clRtnH010']
if relFlds == True:
tdata['st6mRealized-Rel'] = tdata['relHv010'] - tdata['relHv126']
tdata['Ret12m1m-Rel'] = tdata['relRtnH252'] - tdata['relRtnH021']
tdata['Ret12m6m-Rel'] = tdata['relRtnH252'] - tdata['relRtnH126']
tdata['Ret6m1m-Rel'] = tdata['relRtnH126'] - tdata['relRtnH021']
tdata['Ret6m2w-Rel'] = tdata['relRtnH126'] - tdata['relRtnH010']
tdata['Ret3m2w-Rel'] = tdata['relRtnH126'] - tdata['relRtnH010']
tdata = tdata.merge(fidata, left_index = True, right_index = True)
cols = list(tdata.columns)
tdata = tdata.dropna()
scaler = MinMaxScaler() #MinMaxScaler StandardScaler
tdata[cols] = scaler.fit_transform(tdata)
tdata['ticker'] = t
tdata['period'] = df[df['ticker'] == t]['period']
mdata = mdata.append(tdata, ignore_index=True)
c+=1
mdata = mdata.merge(df[ids].reset_index(drop=True),
on=['period', 'ticker'],
how='left')
mdata.index = mdata['period']
# Melt
mmdata = pd.melt(mdata,
id_vars=ids,
value_vars=[x for x in list(mdata.columns) if x not in ids])
return mdata, mmdata
retwindow = 'relRtnF063'
sides = sides.rename(columns={'side': "Side"+retwindow, 'forward': retwindow})
sides = sides.dropna()
scaler = MinMaxScaler()
sides['forwardmmS'] = scaler.fit_transform(sides[retwindow].values.reshape(-1,1))
scaler = StandardScaler()
sides['forwardstdS'] = scaler.fit_transform(sides[retwindow].values.reshape(-1,1))
sides.index = sides['period']
print(sides.shape)
ids_ = ['period', 'ticker', 'forwardmmS', 'forwardstdS',
retwindow, 'Side'+retwindow, 'relRtn', 'clRtn']
drops_ = ['Pxln', 'relPxln',
'open', 'high', 'low', 'close',
'adjClose', 'adjHigh', 'adjLow', 'adjOpen', 'adjVolume',
'divCash', 'index', 'splitFactor', 'volume', 'date']
mdata, mmdata = createFeatures(sides,
tickers[tickers != 'SPY'], 'relPxln', 'clRtn',
.4, ids_, drops_,
relFlds = True)
print(mdata.shape)
print(mmdata['variable'].unique())
mmdata.head()
cmap = sns.cubehelix_palette(22, start=1,rot=-.80, dark = .3, light=.7)
sns.palplot(cmap)
sns.set_style("darkgrid")
sns.set(rc={'figure.figsize':(40,40)})
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
g = sns.FacetGrid(mmdata, col="variable", hue="variable",
palette=cmap,
height = 8,
col_wrap = 3,
sharex=False,
sharey=False,
margin_titles=True)
g.map(sns.regplot,'value', 'forwardmmS')
g.savefig('plots/RelativeReturnsScatterPlots-3M.png')
df_stats = pd.DataFrame([], columns = ['pval', 'rsquared', 'coef'])
for i in mmdata.variable.unique():
form = "forwardmmS ~ "+str(i)
model = ols(form, mdata).fit()
df_stats.loc[i, :] = [np.round(model.pvalues[1],4),
np.round(model.rsquared, 4),
np.round(model.params[1],3)]
df_stats.sort_values(by=['rsquared'], ascending = False)
mdata.to_csv('data/mdata.csv')
cols = list(mmdatar['variable'].unique())
with open('data/fnamesR.txt', 'w') as f:
for s in cols:
f.write(s + '\n')
!jupyter nbconvert --to html EDA_s0.ipynb